Inference by pitch type + location
Hold pitch traits and release point constant, while changing pitch
location.
# Average pitch type traits
cluster_summary_rr <- RHP_RHH_by_cluster %>%
group_by(Cluster) %>%
summarise(
spinrate = mean(spinrate, na.rm = TRUE),
relspeed = mean(relspeed, na.rm = TRUE),
inducedvertbreak = mean(inducedvertbreak, na.rm = TRUE),
horzbreak = mean(horzbreak, na.rm = TRUE),
initposx = mean(initposx, na.rm = TRUE),
initposz = mean(initposz, na.rm = TRUE)
)
# strikezone
k_zone_height_max <- 3.67
k_zone_heightmin <- 1.52
sides <- c(-0.83, 0.83)
plot_heatmap_per_pitchtype <- function(model, data,
pitch_cluster, title) {
k_zone_height_max <- 3.67
k_zone_height_min <- 1.52
sides <- c(-0.83, 0.83)
cluster_summary_rr <- data %>%
filter(Cluster == pitch_cluster) %>%
summarise(
spinrate = mean(spinrate, na.rm = TRUE),
relspeed = mean(relspeed, na.rm = TRUE),
inducedvertbreak = mean(inducedvertbreak, na.rm = TRUE),
horzbreak = mean(horzbreak, na.rm = TRUE),
initposx = mean(initposx, na.rm = TRUE),
initposz = mean(initposz, na.rm = TRUE)
)
grid <- expand.grid(
platelocside = seq(sides[1], sides[2], length.out = 200),
platelocheight = seq(k_zone_height_min, k_zone_height_max, length.out = 200)
)
grid$spinrate <- cluster_summary_rr$spinrate
grid$relspeed <- cluster_summary_rr$relspeed
grid$inducedvertbreak <- cluster_summary_rr$inducedvertbreak
grid$horzbreak <- cluster_summary_rr$horzbreak
grid$initposx <- cluster_summary_rr$initposx
grid$initposz <- cluster_summary_rr$initposz
grid$Cluster <- pitch_cluster
grid$pred_prob <- predict(model, newdata = grid, type = "response")
p <- ggplot(grid, aes(platelocside, platelocheight, fill = pred_prob)) +
geom_tile() +
scale_fill_viridis_c(option = "C", direction = -1, limits = c(0, 0.5)) +
geom_rect(
aes(xmin = sides[1], xmax = sides[2],
ymin = k_zone_height_min, ymax = k_zone_height_max),
color = "white", fill = NA, linewidth = 0.8
) +
coord_equal() +
labs(
title = paste(title, "Predicted Sweet-Spot Probability — Pitch Type", pitch_cluster),
x = "Plate Side (ft)",
y = "Plate Height (ft)",
fill = "Predicted Prob."
) +
theme_minimal(base_size = 14)
return(p)
}
unique(RHP_RHH_by_cluster$Cluster)
## [1] 1 0 3 4 2
# RHP vs RHH
plot_heatmap_per_pitchtype(GAM_RR, RHP_RHH_by_cluster, 0, "RHP vs RHH")

plot_heatmap_per_pitchtype(GAM_RR, RHP_RHH_by_cluster, 1, "RHP vs RHH")

plot_heatmap_per_pitchtype(GAM_RR, RHP_RHH_by_cluster, 2, "RHP vs RHH")

plot_heatmap_per_pitchtype(GAM_RR, RHP_RHH_by_cluster, 3, "RHP vs RHH")

plot_heatmap_per_pitchtype(GAM_RR, RHP_RHH_by_cluster, 4, "RHP vs RHH")

# RHP vs LHH
plot_heatmap_per_pitchtype(GAM_RL, RHP_LHH_by_cluster, 0, "RHP vs LHH")

plot_heatmap_per_pitchtype(GAM_RL, RHP_LHH_by_cluster, 1, "RHP vs LHH")

plot_heatmap_per_pitchtype(GAM_RL, RHP_LHH_by_cluster, 2, "RHP vs LHH")

plot_heatmap_per_pitchtype(GAM_RL, RHP_LHH_by_cluster, 3, "RHP vs LHH")

plot_heatmap_per_pitchtype(GAM_RL, RHP_LHH_by_cluster, 4, "RHP vs LHH")

# LHP vs RHH
plot_heatmap_per_pitchtype(GAM_LR, LHP_RHH_by_cluster, 0, "LHP vs RHH")

plot_heatmap_per_pitchtype(GAM_LR, LHP_RHH_by_cluster, 1, "LHP vs RHH")

plot_heatmap_per_pitchtype(GAM_LR, LHP_RHH_by_cluster, 2, "LHP vs RHH")

plot_heatmap_per_pitchtype(GAM_LR, LHP_RHH_by_cluster, 3, "LHP vs RHH")

plot_heatmap_per_pitchtype(GAM_LR, LHP_RHH_by_cluster, 4, "LHP vs RHH")

# LHP vs LHH
plot_heatmap_per_pitchtype(GAM_LL, LHP_LHH_by_cluster, 0, "LHP vs LHH")

plot_heatmap_per_pitchtype(GAM_LL, LHP_LHH_by_cluster, 1, "LHP vs LHH")

plot_heatmap_per_pitchtype(GAM_LL, LHP_LHH_by_cluster, 2, "LHP vs LHH")

plot_heatmap_per_pitchtype(GAM_LL, LHP_LHH_by_cluster, 3, "LHP vs LHH")

plot_heatmap_per_pitchtype(GAM_LL, LHP_LHH_by_cluster, 4, "LHP vs LHH")

Doing Zone-based visualizations
plot_zone_per_pitchtype_RHP <- function(model, data,
pitch_cluster,
pitch_type = NULL,
title = NULL) {
# Strike zone bounds
k_zone_height_max <- 3.67
k_zone_height_min <- 1.52
sides <- c(-0.83, 0.83)
# Mean cluster parameters
cluster_summary_rr <- data %>%
filter(Cluster == pitch_cluster) %>%
summarise(
spinrate = mean(spinrate, na.rm = TRUE),
relspeed = mean(relspeed, na.rm = TRUE),
inducedvertbreak = mean(inducedvertbreak, na.rm = TRUE),
horzbreak = mean(horzbreak, na.rm = TRUE),
initposx = mean(initposx, na.rm = TRUE),
initposz = mean(initposz, na.rm = TRUE)
)
# Grid with small buffer outside zone
grid <- expand.grid(
platelocside = seq(sides[1] - 0.5, sides[2] + 0.5, length.out = 300),
platelocheight = seq(k_zone_height_min - 0.5, k_zone_height_max + 0.5, length.out = 300)
)
# Add mean parameters
for (col in names(cluster_summary_rr)) {
grid[[col]] <- cluster_summary_rr[[col]]
}
grid$Cluster <- pitch_cluster
# Predict probabilities
grid$pred_prob <- predict(model, newdata = grid, type = "response")
# --- Define 25 zones (5×5 grid including chase zones)
# NOTE: length(breaks) = length(labels) + 1, otherwise NA
grid <- grid %>%
mutate(
zone_col = cut(
platelocside,
breaks = seq(sides[1] - 0.5, sides[2] + 0.5, length.out = 6),
labels = c("Far Outside", "Middle-Out", "Middle", "Middle-In", "Inside"),
include.lowest = TRUE
),
zone_row = cut(
platelocheight,
breaks = seq(k_zone_height_min - 0.5, k_zone_height_max + 0.5,
length.out = 6),
labels = c("Low-Out", "Low", "Mid", "High", "High-Out"),
include.lowest = TRUE
)
)
# Average within zones
zone_summary <- grid %>%
group_by(zone_row, zone_col) %>%
summarise(mean_prob = mean(pred_prob, na.rm = TRUE), .groups = "drop")
# Reorder for baseball orientation (High top, Inside right)
zone_summary$zone_row <- factor(zone_summary$zone_row,
levels = c("High-Out", "High", "Mid", "Low", "Low-Out"))
zone_summary$zone_col <- factor(zone_summary$zone_col,
levels = c("Far Outside", "Middle-Out", "Middle", "Middle-In", "Inside"))
zone_summary$zone_col <- factor(zone_summary$zone_col, levels = rev(levels(zone_summary$zone_col))) # inside → right
zone_summary$zone_row <- factor(zone_summary$zone_row, levels = rev(levels(zone_summary$zone_row))) # high → top
p <- ggplot(zone_summary, aes(x = rev(zone_col), y = zone_row, fill = mean_prob)) +
geom_tile(color = "white", linewidth = 1) +
geom_text(aes(label = sprintf("%.2f", mean_prob)), color = "white", size = 4.2) +
scale_fill_viridis_c(option = "C", direction = -1, limits = c(0, 0.5)) +
labs(
title = paste0(title, " Predicted Sweet-Spot Probability\n Pitch Type: ", pitch_cluster, " : ", pitch_type),
x = "Horizontal Zone (Catcher View)",
y = "Vertical Zone",
fill = "Pred. Prob."
) +
coord_equal() +
scale_x_discrete(position = "top") +
theme_minimal(base_size = 14) +
theme(
panel.grid = element_blank(),
axis.text.x = element_text(angle = 0, hjust = 0.5, size=10) ,
plot.title = element_text(size=15, hjust=0.5)
) +
geom_rect(
aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - 0.5,
xmax = as.numeric(factor("Middle-Out", levels = levels(zone_summary$zone_col))) + 0.5,
ymin = as.numeric(factor("Low", levels = levels(zone_summary$zone_row))) - 0.5,
ymax = as.numeric(factor("High", levels = levels(zone_summary$zone_row))) + 0.5),
color = "black", fill = NA, linewidth = 1.5, inherit.aes = FALSE
)
return(p)
}
plot_zone_per_pitchtype_RHP(GAM_RR, RHP_RHH_by_cluster, 0,
"Slider/Cutter","RHP vs RHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_RR, RHP_RHH_by_cluster, 1,
"4-Seam Fastball" ,"RHP vs RHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_RR, RHP_RHH_by_cluster, 2,
"Splitter/Changeup", "RHP vs RHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_RR, RHP_RHH_by_cluster, 3,
"Sinker/2-Seamer", "RHP vs RHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_RR, RHP_RHH_by_cluster, 4,
"Curveball/Vertical Dropper", "RHP vs RHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_RL, RHP_LHH_by_cluster, 0,
"Slider/Cutter", "RHP vs LHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_RL, RHP_LHH_by_cluster, 1,
"4-Seam Fastball", "RHP vs LHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_RL, RHP_LHH_by_cluster, 2,
"Splitter/Changeup", "RHP vs LHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_RL, RHP_LHH_by_cluster, 3,
"Sinker/2-Seamer", "RHP vs LHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_RL, RHP_LHH_by_cluster, 4,
"Curveball/Vertical Dropper", "RHP vs LHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_LR, LHP_RHH_by_cluster, 0,
"Curveball/Vertical Dropper", "LHP vs RHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_LR, LHP_RHH_by_cluster, 1,
"4-Seam Fastball", "LHP vs RHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_LR, LHP_RHH_by_cluster, 2,
"Slider/Cutter", "LHP vs RHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_LR, LHP_RHH_by_cluster, 3,
"Splitter/Changeup", "LHP vs RHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_LR, LHP_RHH_by_cluster, 4,
"Sinker/2-Seamer", "LHP vs RHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_LL, LHP_LHH_by_cluster, 0,
"Curveball/Vertical Dropper", "LHP vs LHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_LL, LHP_LHH_by_cluster, 1,
"4-Seam Fastball", "LHP vs LHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_LL, LHP_LHH_by_cluster, 2,
"Slider/Cutter", "LHP vs LHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_LL, LHP_LHH_by_cluster, 3,
"Splitter/Changeup", "LHP vs LHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP(GAM_LL, LHP_LHH_by_cluster, 4,
"Sinker/2-Seamer", "LHP vs LHH")
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_col` is discouraged.
## ℹ Use `zone_col` instead.
## Warning: Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Use of `zone_summary$zone_row` is discouraged.
## ℹ Use `zone_row` instead.
## Warning in geom_rect(aes(xmin = as.numeric(factor("Middle-In", levels = levels(zone_summary$zone_col))) - : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

Ranking Pitches for each platoon
plot_zone_per_pitchtype_RHP_rev <- function(model, data,
pitch_cluster,
pitch_type = NULL,
title = NULL) {
# Strike zone bounds
k_zone_height_max <- 3.67
k_zone_height_min <- 1.52
sides <- c(-0.83, 0.83)
# Define zone width/height and breakpoints
zone_width <- diff(sides)
zone_height <- k_zone_height_max - k_zone_height_min
x_breaks <- seq(sides[1] - zone_width/3,
sides[2] + zone_width/3,
length.out = 6)
y_breaks <- seq(k_zone_height_min - zone_height/3,
k_zone_height_max + zone_height/3,
length.out = 6)
# Cluster means for fixed covariates
cluster_summary_rr <- data %>%
filter(Cluster == pitch_cluster) %>%
summarise(across(c(spinrate, relspeed, inducedvertbreak,
horzbreak, initposx, initposz), ~mean(.x, na.rm=TRUE)))
# Prediction grid
grid <- expand.grid(
platelocside = seq(sides[1] - 0.5, sides[2] + 0.5, length.out = 300),
platelocheight = seq(k_zone_height_min - 0.5, k_zone_height_max + 0.5, length.out = 300)
)
for (col in names(cluster_summary_rr)) grid[[col]] <- cluster_summary_rr[[col]]
grid$Cluster <- pitch_cluster
# Predict probabilities
grid$pred_prob <- predict(model, newdata = grid, type = "response")
# Average within bins and compute boundaries
zone_summary <- grid %>%
mutate(
x_bin = cut(platelocside, breaks = x_breaks, include.lowest = TRUE),
y_bin = cut(platelocheight, breaks = y_breaks, include.lowest = TRUE)
) %>%
group_by(x_bin, y_bin) %>%
summarise(
mean_prob = mean(pred_prob, na.rm = TRUE),
x_min = min(platelocside),
x_max = max(platelocside),
y_min = min(platelocheight),
y_max = max(platelocheight),
.groups = "drop"
) %>%
mutate(
x_center = (x_min + x_max)/2,
y_center = (y_min + y_max)/2
)
# ---- Plot using real coordinates (rectangles) ----
p <- ggplot(zone_summary) +
geom_rect(
aes(xmin = x_min, xmax = x_max,
ymin = y_min, ymax = y_max,
fill = mean_prob),
color = "white", linewidth = 0.8
) +
geom_text(aes(x = x_center, y = y_center,
label = sprintf("%.2f", mean_prob)),
color = "white", size = 3.8) +
# True strike-zone outline
geom_rect(aes(xmin = sides[1], xmax = sides[2],
ymin = k_zone_height_min, ymax = k_zone_height_max),
color = "black", fill = NA, linewidth = 1.3) +
scale_fill_viridis_c(option = "C", direction = -1, limits = c(0, 0.5)) +
coord_equal() +
labs(
title = paste0(title, " — Pitch Profile: ", pitch_type,
" (Cluster ", pitch_cluster, ")"),
x = "Horizontal Location (ft, Catcher View)",
y = "Vertical Location (ft)",
fill = "Pred. Prob."
) +
theme_minimal(base_size = 14) +
theme(
panel.grid = element_blank(),
plot.title = element_text(size = 13, hjust = 0.5)
)
return(p)
}
plot_zone_per_pitchtype_RHP_rev(GAM_RR, RHP_RHH_by_cluster, 0,
"Slider/Cutter","RHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RR, RHP_RHH_by_cluster, 1,
"4-Seam Fastball","RHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RR, RHP_RHH_by_cluster, 2,
"Splitter/Changeup","RHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RR, RHP_RHH_by_cluster, 3,
"Sinker/2-Seamer","RHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RR, RHP_RHH_by_cluster, 4,
"Curveball/Vertical Dropper","RHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RL, RHP_LHH_by_cluster, 0,
"Slider/Cutter","RHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RL, RHP_LHH_by_cluster, 1,
"4-Seam Fastball","RHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RL, RHP_LHH_by_cluster, 2,
"Splitter/Changeup","RHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RL, RHP_LHH_by_cluster, 3,
"Sinker/2-Seamer","RHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RL, RHP_LHH_by_cluster, 4,
"Curveball/Vertical Dropper","RHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LR, LHP_RHH_by_cluster, 0,
"Curveball/Vertical Dropper", "LHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LR, LHP_RHH_by_cluster, 1,
"4-Seam Fastball", "LHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LR, LHP_RHH_by_cluster, 2,
"Slider/Cutter", "LHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LR, LHP_RHH_by_cluster, 3,
"Splitter/Changeup", "LHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LR, LHP_RHH_by_cluster, 4,
"Sinker/2-Seamer", "LHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LL, LHP_LHH_by_cluster, 0,
"Curveball/Vertical Dropper", "LHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LL, LHP_LHH_by_cluster, 1,
"4-Seam Fastball", "LHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LL, LHP_LHH_by_cluster, 2,
"Slider/Cutter", "LHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LL, LHP_LHH_by_cluster, 3,
"Splitter/Changeup", "LHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LL, LHP_LHH_by_cluster, 4,
"Sinker/2-Seamer", "LHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

# RR
RHP_RHH_by_cluster$Prediction <- predict(GAM_RR,
newdata=RHP_RHH_by_cluster,
type="response")
pitch_types_ranked_RR <- RHP_RHH_by_cluster %>%
group_by(Cluster) %>%
summarise(
mean_pred_prob = mean(Prediction, na.rm=TRUE),
se_pred_prob = sd(Prediction, na.rm=TRUE) / sqrt(n()),
n = n()
) %>%
arrange(desc(mean_pred_prob)) %>%
mutate(Rank = row_number())
pitch_types_ranked_RR
## # A tibble: 5 × 5
## Cluster mean_pred_prob se_pred_prob n Rank
## <dbl> <dbl> <dbl> <int> <int>
## 1 3 0.207 0.000320 34464 1
## 2 2 0.191 0.000728 8113 2
## 3 0 0.179 0.000291 38163 3
## 4 4 0.168 0.000421 22204 4
## 5 1 0.129 0.000216 41876 5
# RL
RHP_LHH_by_cluster$Prediction <- predict(GAM_RL,
newdata=RHP_LHH_by_cluster,
type="response")
pitch_types_ranked_RL <- RHP_LHH_by_cluster %>%
group_by(Cluster) %>%
summarise(
mean_pred_prob = mean(Prediction, na.rm=TRUE),
se_pred_prob = sd(Prediction, na.rm=TRUE) / sqrt(n()),
n = n()
) %>%
arrange(desc(mean_pred_prob)) %>%
mutate(Rank = row_number())
pitch_types_ranked_RL
## # A tibble: 5 × 5
## Cluster mean_pred_prob se_pred_prob n Rank
## <dbl> <dbl> <dbl> <int> <int>
## 1 2 0.188 0.000426 19571 1
## 2 3 0.180 0.000366 24419 2
## 3 4 0.159 0.000458 15520 3
## 4 0 0.138 0.000335 20988 4
## 5 1 0.120 0.000198 42944 5
# LR
LHP_RHH_by_cluster$Prediction <- predict(GAM_LR,
newdata=LHP_RHH_by_cluster,
type="response")
pitch_types_ranked_LR <- LHP_RHH_by_cluster %>%
group_by(Cluster) %>%
summarise(
mean_pred_prob = mean(Prediction, na.rm=TRUE),
se_pred_prob = sd(Prediction, na.rm=TRUE) / sqrt(n()),
n = n()
) %>%
arrange(desc(mean_pred_prob)) %>%
mutate(Rank = row_number())
pitch_types_ranked_LR
## # A tibble: 5 × 5
## Cluster mean_pred_prob se_pred_prob n Rank
## <dbl> <dbl> <dbl> <int> <int>
## 1 3 0.199 0.000484 13072 1
## 2 4 0.186 0.000385 15923 2
## 3 0 0.158 0.000544 8841 3
## 4 2 0.146 0.000381 12011 4
## 5 1 0.119 0.000234 23124 5
# LL
LHP_LHH_by_cluster$Prediction <- predict(GAM_LL,
newdata=LHP_LHH_by_cluster,
type="response")
pitch_types_ranked_LL <- LHP_LHH_by_cluster %>%
group_by(Cluster) %>%
summarise(
mean_pred_prob = mean(Prediction, na.rm=TRUE),
se_pred_prob = sd(Prediction, na.rm=TRUE) / sqrt(n()),
n = n()
) %>%
arrange(desc(mean_pred_prob)) %>%
mutate(Rank = row_number())
Pitch Ranking Great Table
library(gt)
pitch_types_left_left <- c("Splitter/Changeup",
"Sinker/2-Seam Fastball",
"Slider/Cutter",
"Curveball/Vertical Dropper",
"4-Seam Fastball")
pitch_types_ranked_LL <- pitch_types_ranked_LL %>%
mutate(
Pitch_Type = pitch_types_left_left,
Relative_to_Avg = mean_pred_prob / mean(mean_pred_prob, na.rm = TRUE)
)
# Create the gt table
gt_LL <- pitch_types_ranked_LL %>%
gt() %>%
cols_hide(columns = c(se_pred_prob)) %>%
cols_move_to_start(columns = c(Rank, Pitch_Type, Cluster,
mean_pred_prob, Relative_to_Avg)) %>%
fmt_number(columns = c(mean_pred_prob, se_pred_prob, Relative_to_Avg), decimals = 3) %>%
cols_label(
Cluster = "Pitch Cluster",
Pitch_Type = "Pitch Profile",
mean_pred_prob = "Mean Predicted Probability",
n = "Sample Size",
Rank = "Rank",
Relative_to_Avg = "Relative to Average"
) %>%
tab_header(
title = "Ranked Optimal GIDP Contact Probabilities by Pitch Profile",
subtitle = "LHP vs. LHH Matchup"
) %>%
fmt_missing(everything(), missing_text = "—") %>%
data_color(
columns = c(Rank),
colors = scales::col_numeric(
palette = c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac"),
domain = range(
pitch_types_ranked_LL$Rank
)
)
) %>%
data_color(
columns = c(Relative_to_Avg),
colors = scales::col_numeric(
palette = rev(c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac")),
domain = range(
pitch_types_ranked_LL$Relative_to_Avg
)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(columns = c(Rank, mean_pred_prob, Relative_to_Avg))
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(18))
),
locations = cells_title(groups = "title")
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(14))
),
locations = cells_title(groups = "subtitle")
) %>%
tab_options(
table.font.size = 13,
data_row.padding = px(5)
)
## Warning: Since gt v0.6.0 `fmt_missing()` is deprecated and will soon be removed.
## ℹ Use `sub_missing()` instead.
## This warning is displayed once every 8 hours.
## Warning: Since gt v0.9.0, the `colors` argument has been deprecated.
## • Please use the `fn` argument instead.
## This warning is displayed once every 8 hours.
gt_LL
| Ranked Optimal GIDP Contact Probabilities by Pitch Profile |
| LHP vs. LHH Matchup |
| Rank |
Pitch Profile |
Pitch Cluster |
Mean Predicted Probability |
Relative to Average |
Sample Size |
| 1 |
Splitter/Changeup |
3 |
0.198 |
1.173 |
984 |
| 2 |
Sinker/2-Seam Fastball |
4 |
0.196 |
1.161 |
7234 |
| 3 |
Slider/Cutter |
2 |
0.168 |
0.998 |
6687 |
| 4 |
Curveball/Vertical Dropper |
0 |
0.151 |
0.895 |
4282 |
| 5 |
4-Seam Fastball |
1 |
0.130 |
0.773 |
7184 |
pitch_types_left_right <- c("Splitter/Changeup",
"Sinker/2-Seam Fastball",
"Curveball/Vertical Dropper",
"Slider/Cutter",
"4-Seam Fastball")
pitch_types_ranked_LR <- pitch_types_ranked_LR %>%
mutate(
Pitch_Type = pitch_types_left_right,
Relative_to_Avg = mean_pred_prob / mean(mean_pred_prob, na.rm = TRUE)
)
# Create the gt table
gt_LR <- pitch_types_ranked_LR %>%
gt() %>%
cols_hide(columns = c(se_pred_prob)) %>%
cols_move_to_start(columns = c(Rank, Pitch_Type, Cluster,
mean_pred_prob, Relative_to_Avg)) %>%
fmt_number(columns = c(mean_pred_prob, se_pred_prob, Relative_to_Avg), decimals = 3) %>%
cols_label(
Cluster = "Pitch Cluster",
Pitch_Type = "Pitch Profile",
mean_pred_prob = "Mean Predicted Probability",
n = "Sample Size",
Rank = "Rank",
Relative_to_Avg = "Relative to Average"
) %>%
tab_header(
title = "Ranked Optimal GIDP Contact Probabilities by Pitch Profile",
subtitle = "LHP vs. RHH Matchup"
) %>%
fmt_missing(everything(), missing_text = "—") %>%
data_color(
columns = c(Rank),
colors = scales::col_numeric(
palette = c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac"),
domain = range(
pitch_types_ranked_LR$Rank
)
)
) %>%
data_color(
columns = c(Relative_to_Avg),
colors = scales::col_numeric(
palette = rev(c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac")),
domain = range(
pitch_types_ranked_LR$Relative_to_Avg
)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(columns = c(Rank, mean_pred_prob, Relative_to_Avg))
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(18))
),
locations = cells_title(groups = "title")
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(14))
),
locations = cells_title(groups = "subtitle")
) %>%
tab_options(
table.font.size = 13,
data_row.padding = px(5)
)
gt_LR
| Ranked Optimal GIDP Contact Probabilities by Pitch Profile |
| LHP vs. RHH Matchup |
| Rank |
Pitch Profile |
Pitch Cluster |
Mean Predicted Probability |
Relative to Average |
Sample Size |
| 1 |
Splitter/Changeup |
3 |
0.199 |
1.228 |
13072 |
| 2 |
Sinker/2-Seam Fastball |
4 |
0.186 |
1.152 |
15923 |
| 3 |
Curveball/Vertical Dropper |
0 |
0.158 |
0.977 |
8841 |
| 4 |
Slider/Cutter |
2 |
0.146 |
0.905 |
12011 |
| 5 |
4-Seam Fastball |
1 |
0.119 |
0.738 |
23124 |
pitch_types_right_left <- c("Splitter/Changeup",
"Sinker/2-Seam Fastball",
"Curveball/Vertical Dropper",
"Slider/Cutter",
"4-Seam Fastball")
pitch_types_ranked_RL <- pitch_types_ranked_RL %>%
mutate(
Pitch_Type = pitch_types_right_left,
Relative_to_Avg = mean_pred_prob / mean(mean_pred_prob, na.rm = TRUE)
)
# Create the gt table
gt_RL <- pitch_types_ranked_RL %>%
gt() %>%
cols_hide(columns = c(se_pred_prob)) %>%
cols_move_to_start(columns = c(Rank, Pitch_Type, Cluster,
mean_pred_prob, Relative_to_Avg)) %>%
fmt_number(columns = c(mean_pred_prob, se_pred_prob, Relative_to_Avg), decimals = 3) %>%
cols_label(
Cluster = "Pitch Cluster",
Pitch_Type = "Pitch Profile",
mean_pred_prob = "Mean Predicted Probability",
n = "Sample Size",
Rank = "Rank",
Relative_to_Avg = "Relative to Average"
) %>%
tab_header(
title = "Ranked Optimal GIDP Contact Probabilities by Pitch Profile",
subtitle = "RHP vs. LHH Matchup"
) %>%
fmt_missing(everything(), missing_text = "—") %>%
data_color(
columns = c(Rank),
colors = scales::col_numeric(
palette = c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac"),
domain = range(
pitch_types_ranked_RL$Rank
)
)
) %>%
data_color(
columns = c(Relative_to_Avg),
colors = scales::col_numeric(
palette = rev(c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac")),
domain = range(
pitch_types_ranked_RL$Relative_to_Avg
)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(columns = c(Rank, mean_pred_prob, Relative_to_Avg))
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(18))
),
locations = cells_title(groups = "title")
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(14))
),
locations = cells_title(groups = "subtitle")
) %>%
tab_options(
table.font.size = 13,
data_row.padding = px(5)
)
gt_RL
| Ranked Optimal GIDP Contact Probabilities by Pitch Profile |
| RHP vs. LHH Matchup |
| Rank |
Pitch Profile |
Pitch Cluster |
Mean Predicted Probability |
Relative to Average |
Sample Size |
| 1 |
Splitter/Changeup |
2 |
0.188 |
1.198 |
19571 |
| 2 |
Sinker/2-Seam Fastball |
3 |
0.180 |
1.145 |
24419 |
| 3 |
Curveball/Vertical Dropper |
4 |
0.159 |
1.013 |
15520 |
| 4 |
Slider/Cutter |
0 |
0.138 |
0.880 |
20988 |
| 5 |
4-Seam Fastball |
1 |
0.120 |
0.764 |
42944 |
pitch_types_right_right <- c("Sinker/2-Seam Fastball",
"Splitter/Changeup",
"Slider/Cutter",
"Curveball/Vertical Dropper",
"4-Seam Fastball")
pitch_types_ranked_RR <- pitch_types_ranked_RR %>%
mutate(
Pitch_Type = pitch_types_right_right,
Relative_to_Avg = mean_pred_prob / mean(mean_pred_prob, na.rm = TRUE)
)
# Create the gt table
gt_RR <- pitch_types_ranked_RR %>%
gt() %>%
cols_hide(columns = c(se_pred_prob)) %>%
cols_move_to_start(columns = c(Rank, Pitch_Type, Cluster,
mean_pred_prob, Relative_to_Avg)) %>%
fmt_number(columns = c(mean_pred_prob, se_pred_prob, Relative_to_Avg), decimals = 3) %>%
cols_label(
Cluster = "Pitch Cluster",
Pitch_Type = "Pitch Profile",
mean_pred_prob = "Mean Predicted Probability",
n = "Sample Size",
Rank = "Rank",
Relative_to_Avg = "Relative to Average"
) %>%
tab_header(
title = "Ranked Optimal GIDP Contact Probabilities by Pitch Profile",
subtitle = "RHP vs. RHH Matchup"
) %>%
fmt_missing(everything(), missing_text = "—") %>%
data_color(
columns = c(Rank),
colors = scales::col_numeric(
palette = c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac"),
domain = range(
pitch_types_ranked_RR$Rank
)
)
) %>%
data_color(
columns = c(Relative_to_Avg),
colors = scales::col_numeric(
palette = rev(c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac")),
domain = range(
pitch_types_ranked_RR$Relative_to_Avg
)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(columns = c(Rank, mean_pred_prob, Relative_to_Avg))
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(18))
),
locations = cells_title(groups = "title")
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(14))
),
locations = cells_title(groups = "subtitle")
) %>%
tab_options(
table.font.size = 13,
data_row.padding = px(5)
)
gt_RR
| Ranked Optimal GIDP Contact Probabilities by Pitch Profile |
| RHP vs. RHH Matchup |
| Rank |
Pitch Profile |
Pitch Cluster |
Mean Predicted Probability |
Relative to Average |
Sample Size |
| 1 |
Sinker/2-Seam Fastball |
3 |
0.207 |
1.183 |
34464 |
| 2 |
Splitter/Changeup |
2 |
0.191 |
1.092 |
8113 |
| 3 |
Slider/Cutter |
0 |
0.179 |
1.024 |
38163 |
| 4 |
Curveball/Vertical Dropper |
4 |
0.168 |
0.963 |
22204 |
| 5 |
4-Seam Fastball |
1 |
0.129 |
0.738 |
41876 |
library(gratia)
smooth_eff_RR <- gratia::smooth_estimates(GAM_RR)
feature_rank_RR <- smooth_eff_RR %>%
group_by(.smooth) %>%
summarise(
effect_range = max(.estimate) - min(.estimate),
effect_sd = sd(.estimate)
) %>%
arrange(desc(effect_range))
feature_rank_RR
## # A tibble: 10 × 3
## .smooth effect_range effect_sd
## <chr> <dbl> <dbl>
## 1 ti(platelocside,platelocheight) 6.55 0.947
## 2 s(platelocside) 3.09 0.838
## 3 s(relspeed) 2.17 0.697
## 4 s(inducedvertbreak) 2.11 0.584
## 5 s(platelocheight) 1.91 0.596
## 6 ti(initposx,initposz) 1.69 0.198
## 7 s(horzbreak) 0.940 0.277
## 8 s(initposz) 0.780 0.229
## 9 s(spinrate) 0.301 0.0881
## 10 s(initposx) 0.0644 0.0192
smooth_eff_RL <- gratia::smooth_estimates(GAM_RL)
feature_rank_RL <- smooth_eff_RL %>%
group_by(.smooth) %>%
summarise(
effect_range = max(.estimate) - min(.estimate),
effect_sd = sd(.estimate)
) %>%
arrange(desc(effect_range))
feature_rank_RL
## # A tibble: 10 × 3
## .smooth effect_range effect_sd
## <chr> <dbl> <dbl>
## 1 s(inducedvertbreak) 2.65 0.710
## 2 s(platelocside) 2.46 0.730
## 3 s(relspeed) 1.98 0.662
## 4 ti(platelocside,platelocheight) 1.73 0.251
## 5 s(platelocheight) 1.37 0.425
## 6 s(horzbreak) 0.780 0.204
## 7 s(initposx) 0.678 0.203
## 8 ti(initposx,initposz) 0.451 0.0765
## 9 s(spinrate) 0.282 0.0914
## 10 s(initposz) 0.218 0.0599
smooth_eff_LR <- gratia::smooth_estimates(GAM_LR)
feature_rank_LR <- smooth_eff_LR %>%
group_by(.smooth) %>%
summarise(
effect_range = max(.estimate) - min(.estimate),
effect_sd = sd(.estimate)
) %>%
arrange(desc(effect_range))
feature_rank_LR
## # A tibble: 10 × 3
## .smooth effect_range effect_sd
## <chr> <dbl> <dbl>
## 1 ti(platelocside,platelocheight) 26.7 2.80
## 2 s(platelocside) 4.84 1.51
## 3 s(inducedvertbreak) 3.30 0.884
## 4 s(horzbreak) 3.25 0.848
## 5 s(platelocheight) 1.75 0.513
## 6 s(relspeed) 1.57 0.539
## 7 ti(initposx,initposz) 0.672 0.0803
## 8 s(spinrate) 0.639 0.214
## 9 s(initposz) 0.255 0.0871
## 10 s(initposx) 0.248 0.0727
smooth_eff_LL <- gratia::smooth_estimates(GAM_LL)
feature_rank_LL <- smooth_eff_LL %>%
group_by(.smooth) %>%
summarise(
effect_range = max(.estimate) - min(.estimate),
effect_sd = sd(.estimate)
) %>%
arrange(desc(effect_range))
feature_rank_LL
## # A tibble: 10 × 3
## .smooth effect_range effect_sd
## <chr> <dbl> <dbl>
## 1 ti(platelocside,platelocheight) 15.0 1.84
## 2 s(inducedvertbreak) 8.46 2.72
## 3 s(platelocheight) 2.15 0.629
## 4 s(platelocside) 1.75 0.483
## 5 s(relspeed) 1.67 0.518
## 6 s(initposz) 0.887 0.260
## 7 s(horzbreak) 0.829 0.289
## 8 s(spinrate) 0.309 0.0905
## 9 s(initposx) 0.165 0.0482
## 10 ti(initposx,initposz) 0.159 0.0235